import pandas as pd
import numpy as npPandas 기본기 | 데이터프레임 핸들링
열 이름 변경, 열 추가, 리스트 컴프리헨션, 결측치 파악,
query, 매핑 등등… 해당 내용은 왠만해선 다 알아두는 게 좋다.
해당 포스트는 전북대학교 통계학과 최규빈 교수님의 강의내용을 토대로 재구성되었음을 알립니다.
1. import
2. Pandas 기본기능
A. 열의 이름 변경
df = pd.DataFrame(np.random.randn(3, 2))
df| 0 | 1 | |
|---|---|---|
| 0 | -0.000655 | 0.686701 |
| 1 | 0.591774 | 0.842045 |
| 2 | -0.027722 | -0.703161 |
- 방법1 : df.columns에 대입
df.columns = ['A', 'B']
df| A | B | |
|---|---|---|
| 0 | -0.000655 | 0.686701 |
| 1 | 0.591774 | 0.842045 |
| 2 | -0.027722 | -0.703161 |
- 방법2 : df.set_axis() \(\star\star\star\)
df2 = pd.DataFrame(np.random.randn(5,3))
df2| 0 | 1 | 2 | |
|---|---|---|---|
| 0 | 0.200618 | -0.567175 | -0.249051 |
| 1 | 0.805185 | -0.479624 | 0.797904 |
| 2 | -1.278647 | -0.061503 | 1.048704 |
| 3 | 0.308626 | -3.294418 | 0.326681 |
| 4 | 1.585979 | -1.200001 | 0.386765 |
df2 = df2.set_axis(['A','B','C'], axis = 1)
df2
#df2.set_axis(['a','b','c','d,',e'], axis = 0)으로 하면 인덱스가 바뀐다.| A | B | C | |
|---|---|---|---|
| 0 | 0.200618 | -0.567175 | -0.249051 |
| 1 | 0.805185 | -0.479624 | 0.797904 |
| 2 | -1.278647 | -0.061503 | 1.048704 |
| 3 | 0.308626 | -3.294418 | 0.326681 |
| 4 | 1.585979 | -1.200001 | 0.386765 |
- 방법3 : df.rename()
df3 = pd.DataFrame(np.random.randn(5,3))
df3| 0 | 1 | 2 | |
|---|---|---|---|
| 0 | 0.202540 | 0.265273 | 1.855420 |
| 1 | -0.422516 | -0.954117 | -0.050532 |
| 2 | -0.010961 | -1.681503 | -1.613766 |
| 3 | 0.855199 | 0.773191 | 1.149413 |
| 4 | 0.310184 | -0.063591 | -0.572836 |
df3.rename({0 : 'A'}, axis = 1) ## dictionary 형태로 지정, 특정 열만 바꿈
##df3.rename(columns = {0 : 'A'})와 동일| A | 1 | 2 | |
|---|---|---|---|
| 0 | 0.202540 | 0.265273 | 1.855420 |
| 1 | -0.422516 | -0.954117 | -0.050532 |
| 2 | -0.010961 | -1.681503 | -1.613766 |
| 3 | 0.855199 | 0.773191 | 1.149413 |
| 4 | 0.310184 | -0.063591 | -0.572836 |
### B. 행의 이름 변경
- 방법1 : df.index에 대입
df = pd.DataFrame(np.random.randn(2,3))
df| 0 | 1 | 2 | |
|---|---|---|---|
| 0 | 0.108275 | -0.802206 | -3.011323 |
| 1 | -1.437775 | -1.868590 | -0.079212 |
df.index = ['a', 'b']
df| 0 | 1 | 2 | |
|---|---|---|---|
| a | 0.108275 | -0.802206 | -3.011323 |
| b | -1.437775 | -1.868590 | -0.079212 |
- 방법2 : df.set_axis() \(\star\star\star\)
df = pd.DataFrame(np.random.randn(2,3))
df| 0 | 1 | 2 | |
|---|---|---|---|
| 0 | -0.179379 | 0.684650 | 1.678079 |
| 1 | 0.487614 | -1.358992 | -0.661587 |
df.set_axis(['1','2'], axis = 0)| 0 | 1 | 2 | |
|---|---|---|---|
| 1 | -0.179379 | 0.684650 | 1.678079 |
| 2 | 0.487614 | -1.358992 | -0.661587 |
- 방법3 : df.rename()
df = pd.DataFrame(np.random.randn(2,3))
df| 0 | 1 | 2 | |
|---|---|---|---|
| 0 | -0.051285 | 1.185885 | 0.841335 |
| 1 | 0.118555 | 1.527457 | 0.544870 |
df.rename({0 : 'A'}, axis = 0) ## default = 0| 0 | 1 | 2 | |
|---|---|---|---|
| A | -0.051285 | 1.185885 | 0.841335 |
| 1 | 0.118555 | 1.527457 | 0.544870 |
- 방법 4 : df.set_index() > 임의의 열을 행이름으로 지정, 이미 있던 열 하나를 인덱스로 잡고 싶을 시 사용
df = pd.DataFrame({'id':['2020-43052','2021-43053'], 'X1':[1,2],'X2':[2,3]})
df| id | X1 | X2 | |
|---|---|---|---|
| 0 | 2020-43052 | 1 | 2 |
| 1 | 2021-43053 | 2 | 3 |
df.set_index('id')| X1 | X2 | |
|---|---|---|
| id | ||
| 2020-43052 | 1 | 2 |
| 2021-43053 | 2 | 3 |
# A~B에 대한 연습문제
- 데이터 load
df = pd.read_csv('https://raw.githubusercontent.com/guebin/DV2022/master/posts/FIFA23_official_data.csv')
df.head()| ID | Name | Age | Photo | Nationality | Flag | Overall | Potential | Club | Club Logo | ... | Real Face | Position | Joined | Loaned From | Contract Valid Until | Height | Weight | Release Clause | Kit Number | Best Overall Rating | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 0 | 209658 | L. Goretzka | 27 | https://cdn.sofifa.net/players/209/658/23_60.png | Germany | https://cdn.sofifa.net/flags/de.png | 87 | 88 | FC Bayern München | https://cdn.sofifa.net/teams/21/30.png | ... | Yes | <span class="pos pos28">SUB | Jul 1, 2018 | NaN | 2026 | 189cm | 82kg | €157M | 8.0 | NaN |
| 1 | 212198 | Bruno Fernandes | 27 | https://cdn.sofifa.net/players/212/198/23_60.png | Portugal | https://cdn.sofifa.net/flags/pt.png | 86 | 87 | Manchester United | https://cdn.sofifa.net/teams/11/30.png | ... | Yes | <span class="pos pos15">LCM | Jan 30, 2020 | NaN | 2026 | 179cm | 69kg | €155M | 8.0 | NaN |
| 2 | 224334 | M. Acuña | 30 | https://cdn.sofifa.net/players/224/334/23_60.png | Argentina | https://cdn.sofifa.net/flags/ar.png | 85 | 85 | Sevilla FC | https://cdn.sofifa.net/teams/481/30.png | ... | No | <span class="pos pos7">LB | Sep 14, 2020 | NaN | 2024 | 172cm | 69kg | €97.7M | 19.0 | NaN |
| 3 | 192985 | K. De Bruyne | 31 | https://cdn.sofifa.net/players/192/985/23_60.png | Belgium | https://cdn.sofifa.net/flags/be.png | 91 | 91 | Manchester City | https://cdn.sofifa.net/teams/10/30.png | ... | Yes | <span class="pos pos13">RCM | Aug 30, 2015 | NaN | 2025 | 181cm | 70kg | €198.9M | 17.0 | NaN |
| 4 | 224232 | N. Barella | 25 | https://cdn.sofifa.net/players/224/232/23_60.png | Italy | https://cdn.sofifa.net/flags/it.png | 86 | 89 | Inter | https://cdn.sofifa.net/teams/44/30.png | ... | Yes | <span class="pos pos13">RCM | Sep 1, 2020 | NaN | 2026 | 172cm | 68kg | €154.4M | 23.0 | NaN |
5 rows × 29 columns
# 예제1 : 열의 이름을 출력하고, 열의 이름중 공백()이 있을 경우 언더바(_) 로 바꾸자.
df.columnsIndex(['ID', 'Name', 'Age', 'Photo', 'Nationality', 'Flag', 'Overall',
'Potential', 'Club', 'Club Logo', 'Value', 'Wage', 'Special',
'Preferred Foot', 'International Reputation', 'Weak Foot',
'Skill Moves', 'Work Rate', 'Body Type', 'Real Face', 'Position',
'Joined', 'Loaned From', 'Contract Valid Until', 'Height', 'Weight',
'Release Clause', 'Kit Number', 'Best Overall Rating'],
dtype='object')
- 방법1 : df.columns에 직접 대입
df_ = df
df_.columns = [i.replace(' ', '_') for i in df_.columns]df_.columnsIndex(['ID', 'Name', 'Age', 'Photo', 'Nationality', 'Flag', 'Overall',
'Potential', 'Club', 'Club_Logo', 'Value', 'Wage', 'Special',
'Preferred_Foot', 'International_Reputation', 'Weak_Foot',
'Skill_Moves', 'Work_Rate', 'Body_Type', 'Real_Face', 'Position',
'Joined', 'Loaned_From', 'Contract_Valid_Until', 'Height', 'Weight',
'Release_Clause', 'Kit_Number', 'Best_Overall_Rating'],
dtype='object')
- 방법2 : set_axis() 이용 \(\star\star\star\)
df_ = df
df_.set_axis([col.replace(' ', '_') for col in df_.columns], axis = 1).columnsIndex(['ID', 'Name', 'Age', 'Photo', 'Nationality', 'Flag', 'Overall',
'Potential', 'Club', 'Club_Logo', 'Value', 'Wage', 'Special',
'Preferred_Foot', 'International_Reputation', 'Weak_Foot',
'Skill_Moves', 'Work_Rate', 'Body_Type', 'Real_Face', 'Position',
'Joined', 'Loaned_From', 'Contract_Valid_Until', 'Height', 'Weight',
'Release_Clause', 'Kit_Number', 'Best_Overall_Rating'],
dtype='object')
- 방법 3 : rename() 이용(안중요함)
temp3 = df
dic = {i:i.replace(' ','_') for i in df.columns}
temp3.rename(dic, axis = 1).columnsIndex(['ID', 'Name', 'Age', 'Photo', 'Nationality', 'Flag', 'Overall',
'Potential', 'Club', 'Club_Logo', 'Value', 'Wage', 'Special',
'Preferred_Foot', 'International_Reputation', 'Weak_Foot',
'Skill_Moves', 'Work_Rate', 'Body_Type', 'Real_Face', 'Position',
'Joined', 'Loaned_From', 'Contract_Valid_Until', 'Height', 'Weight',
'Release_Clause', 'Kit_Number', 'Best_Overall_Rating'],
dtype='object')
# 예제2: ID를 row-index로 지정하라.
df.ID0 209658
1 212198
2 224334
3 192985
4 224232
...
17655 269526
17656 267946
17657 270567
17658 256624
17659 256376
Name: ID, Length: 17660, dtype: int64
- 방법1 : 직접지정
df_ = df
df_.index = df.ID
df_.indexIndex([209658, 212198, 224334, 192985, 224232, 212622, 197445, 187961, 208333,
210514,
...
256879, 269546, 267647, 253186, 267461, 269526, 267946, 270567, 256624,
256376],
dtype='int64', name='ID', length=17660)
- 방법2 : set_axis() \(\star\star\star\)
df_ = df
df_ = df_.set_axis(df.ID) ## default : axis = 0, df_.set_axis(df.ID, axis = 0)과 동일
df_.indexInt64Index([209658, 212198, 224334, 192985, 224232, 212622, 197445, 187961,
208333, 210514,
...
256879, 269546, 267647, 253186, 267461, 269526, 267946, 270567,
256624, 256376],
dtype='int64', name='ID', length=17660)
- 방법3 : set_index()
이 경우 해당 열을 나중에 따로 드랍하지 않아도 됨
df_ = df
df_ = df_.set_index('ID')
df_.indexIndex([209658, 212198, 224334, 192985, 224232, 212622, 197445, 187961, 208333,
210514,
...
256879, 269546, 267647, 253186, 267461, 269526, 267946, 270567, 256624,
256376],
dtype='int64', name='ID', length=17660)
### C. df.T | 데이터프레임을 전치
df.T를 이용하여 데이터를 살피면 편리함
- data load
df = pd.read_csv('https://raw.githubusercontent.com/guebin/DV2022/master/posts/FIFA23_official_data.csv')
df.head()| ID | Name | Age | Photo | Nationality | Flag | Overall | Potential | Club | Club Logo | ... | Real Face | Position | Joined | Loaned From | Contract Valid Until | Height | Weight | Release Clause | Kit Number | Best Overall Rating | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 0 | 209658 | L. Goretzka | 27 | https://cdn.sofifa.net/players/209/658/23_60.png | Germany | https://cdn.sofifa.net/flags/de.png | 87 | 88 | FC Bayern München | https://cdn.sofifa.net/teams/21/30.png | ... | Yes | <span class="pos pos28">SUB | Jul 1, 2018 | NaN | 2026 | 189cm | 82kg | €157M | 8.0 | NaN |
| 1 | 212198 | Bruno Fernandes | 27 | https://cdn.sofifa.net/players/212/198/23_60.png | Portugal | https://cdn.sofifa.net/flags/pt.png | 86 | 87 | Manchester United | https://cdn.sofifa.net/teams/11/30.png | ... | Yes | <span class="pos pos15">LCM | Jan 30, 2020 | NaN | 2026 | 179cm | 69kg | €155M | 8.0 | NaN |
| 2 | 224334 | M. Acuña | 30 | https://cdn.sofifa.net/players/224/334/23_60.png | Argentina | https://cdn.sofifa.net/flags/ar.png | 85 | 85 | Sevilla FC | https://cdn.sofifa.net/teams/481/30.png | ... | No | <span class="pos pos7">LB | Sep 14, 2020 | NaN | 2024 | 172cm | 69kg | €97.7M | 19.0 | NaN |
| 3 | 192985 | K. De Bruyne | 31 | https://cdn.sofifa.net/players/192/985/23_60.png | Belgium | https://cdn.sofifa.net/flags/be.png | 91 | 91 | Manchester City | https://cdn.sofifa.net/teams/10/30.png | ... | Yes | <span class="pos pos13">RCM | Aug 30, 2015 | NaN | 2025 | 181cm | 70kg | €198.9M | 17.0 | NaN |
| 4 | 224232 | N. Barella | 25 | https://cdn.sofifa.net/players/224/232/23_60.png | Italy | https://cdn.sofifa.net/flags/it.png | 86 | 89 | Inter | https://cdn.sofifa.net/teams/44/30.png | ... | Yes | <span class="pos pos13">RCM | Sep 1, 2020 | NaN | 2026 | 172cm | 68kg | €154.4M | 23.0 | NaN |
5 rows × 29 columns
- df.T : 데이터프레임을 전치(transition)한다.
df.T.loc[:,:3]| 0 | 1 | 2 | 3 | |
|---|---|---|---|---|
| ID | 209658 | 212198 | 224334 | 192985 |
| Name | L. Goretzka | Bruno Fernandes | M. Acuña | K. De Bruyne |
| Age | 27 | 27 | 30 | 31 |
| Photo | https://cdn.sofifa.net/players/209/658/23_60.png | https://cdn.sofifa.net/players/212/198/23_60.png | https://cdn.sofifa.net/players/224/334/23_60.png | https://cdn.sofifa.net/players/192/985/23_60.png |
| Nationality | Germany | Portugal | Argentina | Belgium |
| Flag | https://cdn.sofifa.net/flags/de.png | https://cdn.sofifa.net/flags/pt.png | https://cdn.sofifa.net/flags/ar.png | https://cdn.sofifa.net/flags/be.png |
| Overall | 87 | 86 | 85 | 91 |
| Potential | 88 | 87 | 85 | 91 |
| Club | FC Bayern München | Manchester United | Sevilla FC | Manchester City |
| Club Logo | https://cdn.sofifa.net/teams/21/30.png | https://cdn.sofifa.net/teams/11/30.png | https://cdn.sofifa.net/teams/481/30.png | https://cdn.sofifa.net/teams/10/30.png |
| Value | €91M | €78.5M | €46.5M | €107.5M |
| Wage | €115K | €190K | €46K | €350K |
| Special | 2312 | 2305 | 2303 | 2303 |
| Preferred Foot | Right | Right | Left | Right |
| International Reputation | 4.0 | 3.0 | 2.0 | 4.0 |
| Weak Foot | 4.0 | 3.0 | 3.0 | 5.0 |
| Skill Moves | 3.0 | 4.0 | 3.0 | 4.0 |
| Work Rate | High/ Medium | High/ High | High/ High | High/ High |
| Body Type | Unique | Unique | Stocky (170-185) | Unique |
| Real Face | Yes | Yes | No | Yes |
| Position | <span class="pos pos28">SUB | <span class="pos pos15">LCM | <span class="pos pos7">LB | <span class="pos pos13">RCM |
| Joined | Jul 1, 2018 | Jan 30, 2020 | Sep 14, 2020 | Aug 30, 2015 |
| Loaned From | NaN | NaN | NaN | NaN |
| Contract Valid Until | 2026 | 2026 | 2024 | 2025 |
| Height | 189cm | 179cm | 172cm | 181cm |
| Weight | 82kg | 69kg | 69kg | 70kg |
| Release Clause | €157M | €155M | €97.7M | €198.9M |
| Kit Number | 8.0 | 8.0 | 19.0 | 17.0 |
| Best Overall Rating | NaN | NaN | NaN | NaN |
- 출력옵션 조정
pd.options.display.max_rows = 10
display(df.T.iloc[:, :3])
pd.reset_option('display.max_rows') ## 디폴트 옵션으로 변경| 0 | 1 | 2 | |
|---|---|---|---|
| ID | 209658 | 212198 | 224334 |
| Name | L. Goretzka | Bruno Fernandes | M. Acuña |
| Age | 27 | 27 | 30 |
| Photo | https://cdn.sofifa.net/players/209/658/23_60.png | https://cdn.sofifa.net/players/212/198/23_60.png | https://cdn.sofifa.net/players/224/334/23_60.png |
| Nationality | Germany | Portugal | Argentina |
| ... | ... | ... | ... |
| Height | 189cm | 179cm | 172cm |
| Weight | 82kg | 69kg | 69kg |
| Release Clause | €157M | €155M | €97.7M |
| Kit Number | 8.0 | 8.0 | 19.0 |
| Best Overall Rating | NaN | NaN | NaN |
29 rows × 3 columns
여기선 설명을 위해 줄이는 옵션을 사용했지만, 보통은 늘려서 사용함.
### D. df.dtypes, s,dtype | 데이터의 타입 산출
- df.dtypes
데이터프레임 각 열에 저장된 데이터들의 타입을 알려준다.
df.dtypesID int64
Name object
Age int64
Photo object
Nationality object
Flag object
Overall int64
Potential int64
Club object
Club Logo object
Value object
Wage object
Special int64
Preferred Foot object
International Reputation float64
Weak Foot float64
Skill Moves float64
Work Rate object
Body Type object
Real Face object
Position object
Joined object
Loaned From object
Contract Valid Until object
Height object
Weight object
Release Clause object
Kit Number float64
Best Overall Rating object
dtype: object
object : string이라고 생각해도 무방. 범주형 자료.
- s.dtype Series에 붙여 사용
df.Name.dtype ## 한 행의 데이터 타입만을 산출dtype('O')
- 다양한 활용이 가능
df.Name.dtype == np.object_True
df.Age.dtype == np.int64True
df['International Reputation'].dtype == np.float64True
bool을 산출하니까 컴프리헨션에 조건문 걸어서 해도 되고… 활용의 여지가 넓다.
# 예제: df에서 int64 자료형만 출력
- 풀이 1 : 표를 보고 직접 뽑음
pd.Series(list(df.dtypes))0 int64
1 object
2 int64
3 object
4 object
5 object
6 int64
7 int64
8 object
9 object
10 object
11 object
12 int64
13 object
14 float64
15 float64
16 float64
17 object
18 object
19 object
20 object
21 object
22 object
23 object
24 object
25 object
26 object
27 float64
28 object
dtype: object
df.iloc[:, [0,2,6,7,12]]| ID | Age | Overall | Potential | Special | |
|---|---|---|---|---|---|
| 0 | 209658 | 27 | 87 | 88 | 2312 |
| 1 | 212198 | 27 | 86 | 87 | 2305 |
| 2 | 224334 | 30 | 85 | 85 | 2303 |
| 3 | 192985 | 31 | 91 | 91 | 2303 |
| 4 | 224232 | 25 | 86 | 89 | 2296 |
| ... | ... | ... | ... | ... | ... |
| 17655 | 269526 | 19 | 48 | 61 | 762 |
| 17656 | 267946 | 17 | 48 | 64 | 761 |
| 17657 | 270567 | 25 | 51 | 56 | 759 |
| 17658 | 256624 | 18 | 50 | 65 | 758 |
| 17659 | 256376 | 20 | 50 | 61 | 749 |
17660 rows × 5 columns
- 풀이 2 : 리스트 컴프리핸션 이용
df.loc[:, [o == np.int64 for o in df.dtypes]]
##df.loc[:, [o == 'int64' for o in df.dtypes]]| ID | Age | Overall | Potential | Special | |
|---|---|---|---|---|---|
| 0 | 209658 | 27 | 87 | 88 | 2312 |
| 1 | 212198 | 27 | 86 | 87 | 2305 |
| 2 | 224334 | 30 | 85 | 85 | 2303 |
| 3 | 192985 | 31 | 91 | 91 | 2303 |
| 4 | 224232 | 25 | 86 | 89 | 2296 |
| ... | ... | ... | ... | ... | ... |
| 17655 | 269526 | 19 | 48 | 61 | 762 |
| 17656 | 267946 | 17 | 48 | 64 | 761 |
| 17657 | 270567 | 25 | 51 | 56 | 759 |
| 17658 | 256624 | 18 | 50 | 65 | 758 |
| 17659 | 256376 | 20 | 50 | 61 | 749 |
17660 rows × 5 columns
### E. df.sort_values() | 데이터들을 정렬
- 예시1 : 순서대로 정렬
df.sort_values('Age') ## 나이가 어린 순서대로 오름차순 정렬 / 인덱스 개판| ID | Name | Age | Photo | Nationality | Flag | Overall | Potential | Club | Club Logo | ... | Real Face | Position | Joined | Loaned From | Contract Valid Until | Height | Weight | Release Clause | Kit Number | Best Overall Rating | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 17636 | 263636 | 22 D. Oncescu | 15 | https://cdn.sofifa.net/players/263/636/22_60.png | Romania | https://cdn.sofifa.net/flags/ro.png | 50 | 72 | FC Dinamo 1948 Bucureşti | https://cdn.sofifa.net/teams/100757/30.png | ... | No | <span class="pos pos29">RES | Jun 1, 2021 | NaN | 2025 | 190cm | 77kg | €306K | 34.0 | NaN |
| 13712 | 271072 | E. Topcu | 16 | https://cdn.sofifa.net/players/271/072/23_60.png | Republic of Ireland | https://cdn.sofifa.net/flags/ie.png | 48 | 58 | Drogheda United | https://cdn.sofifa.net/teams/1572/30.png | ... | No | <span class="pos pos29">RES | Jul 8, 2022 | NaN | 2022 | 183cm | 65kg | €175K | 20.0 | NaN |
| 13078 | 259442 | 22 R. van den Berg | 16 | https://cdn.sofifa.net/players/259/442/22_60.png | Netherlands | https://cdn.sofifa.net/flags/nl.png | 60 | 81 | PEC Zwolle | https://cdn.sofifa.net/teams/1914/30.png | ... | No | <span class="pos pos29">RES | May 24, 2020 | NaN | 2024 | 190cm | 73kg | €1.8M | 33.0 | NaN |
| 11257 | 266205 | 22 Y. Koré | 16 | https://cdn.sofifa.net/players/266/205/22_60.png | France | https://cdn.sofifa.net/flags/fr.png | 59 | 74 | Paris FC | https://cdn.sofifa.net/teams/111817/30.png | ... | No | <span class="pos pos29">RES | Aug 11, 2022 | NaN | 2025 | 187cm | 75kg | €1.1M | 34.0 | NaN |
| 11278 | 261873 | 21 H. Kumagai | 16 | https://cdn.sofifa.net/players/261/873/21_60.png | Japan | https://cdn.sofifa.net/flags/jp.png | 52 | 70 | Vegalta Sendai | https://cdn.sofifa.net/teams/112836/30.png | ... | No | <span class="pos pos29">RES | Apr 16, 2021 | NaN | 2023 | 174cm | 64kg | €375K | 48.0 | NaN |
| ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... |
| 16311 | 254196 | 21 L. Fernández | 42 | https://cdn.sofifa.net/players/254/196/21_60.png | Colombia | https://cdn.sofifa.net/flags/co.png | 61 | 61 | Sociedad Deportiva Aucas | https://cdn.sofifa.net/teams/110987/30.png | ... | No | <span class="pos pos28">SUB | Jan 29, 2018 | NaN | 2024 | 187cm | 82kg | €75K | 1.0 | NaN |
| 16036 | 216692 | S. Torrico | 42 | https://cdn.sofifa.net/players/216/692/23_60.png | Argentina | https://cdn.sofifa.net/flags/ar.png | 72 | 72 | San Lorenzo de Almagro | https://cdn.sofifa.net/teams/1013/30.png | ... | No | <span class="pos pos0">GK | Apr 25, 2013 | NaN | 2022 | 183cm | 84kg | €375K | 12.0 | NaN |
| 17257 | 645 | 17 D. Andersson | 43 | https://cdn.sofifa.net/players/000/645/17_60.png | Sweden | https://cdn.sofifa.net/flags/se.png | 57 | 57 | Helsingborgs IF | https://cdn.sofifa.net/teams/432/30.png | ... | No | <span class="pos pos28">SUB | Apr 21, 2016 | NaN | 2022 | 187cm | 85kg | NaN | 39.0 | NaN |
| 15375 | 1179 | G. Buffon | 44 | https://cdn.sofifa.net/players/001/179/23_60.png | Italy | https://cdn.sofifa.net/flags/it.png | 79 | 79 | Parma | https://cdn.sofifa.net/teams/50/30.png | ... | Yes | <span class="pos pos0">GK | Jul 1, 2021 | NaN | 2024 | 192cm | 92kg | €3M | 1.0 | NaN |
| 15272 | 254704 | 22 K. Miura | 54 | https://cdn.sofifa.net/players/254/704/22_60.png | Japan | https://cdn.sofifa.net/flags/jp.png | 56 | 56 | Yokohama FC | https://cdn.sofifa.net/teams/113197/30.png | ... | No | <span class="pos pos29">RES | Jul 1, 2005 | NaN | 2022 | 177cm | 72kg | NaN | 11.0 | NaN |
17660 rows × 29 columns
- 예시 2 : 내림차순으로 정렬
df.sort_values('Age', ascending = False) ## default : ascending = True| ID | Name | Age | Photo | Nationality | Flag | Overall | Potential | Club | Club Logo | ... | Real Face | Position | Joined | Loaned From | Contract Valid Until | Height | Weight | Release Clause | Kit Number | Best Overall Rating | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 15272 | 254704 | 22 K. Miura | 54 | https://cdn.sofifa.net/players/254/704/22_60.png | Japan | https://cdn.sofifa.net/flags/jp.png | 56 | 56 | Yokohama FC | https://cdn.sofifa.net/teams/113197/30.png | ... | No | <span class="pos pos29">RES | Jul 1, 2005 | NaN | 2022 | 177cm | 72kg | NaN | 11.0 | NaN |
| 15375 | 1179 | G. Buffon | 44 | https://cdn.sofifa.net/players/001/179/23_60.png | Italy | https://cdn.sofifa.net/flags/it.png | 79 | 79 | Parma | https://cdn.sofifa.net/teams/50/30.png | ... | Yes | <span class="pos pos0">GK | Jul 1, 2021 | NaN | 2024 | 192cm | 92kg | €3M | 1.0 | NaN |
| 17257 | 645 | 17 D. Andersson | 43 | https://cdn.sofifa.net/players/000/645/17_60.png | Sweden | https://cdn.sofifa.net/flags/se.png | 57 | 57 | Helsingborgs IF | https://cdn.sofifa.net/teams/432/30.png | ... | No | <span class="pos pos28">SUB | Apr 21, 2016 | NaN | 2022 | 187cm | 85kg | NaN | 39.0 | NaN |
| 16036 | 216692 | S. Torrico | 42 | https://cdn.sofifa.net/players/216/692/23_60.png | Argentina | https://cdn.sofifa.net/flags/ar.png | 72 | 72 | San Lorenzo de Almagro | https://cdn.sofifa.net/teams/1013/30.png | ... | No | <span class="pos pos0">GK | Apr 25, 2013 | NaN | 2022 | 183cm | 84kg | €375K | 12.0 | NaN |
| 16311 | 254196 | 21 L. Fernández | 42 | https://cdn.sofifa.net/players/254/196/21_60.png | Colombia | https://cdn.sofifa.net/flags/co.png | 61 | 61 | Sociedad Deportiva Aucas | https://cdn.sofifa.net/teams/110987/30.png | ... | No | <span class="pos pos28">SUB | Jan 29, 2018 | NaN | 2024 | 187cm | 82kg | €75K | 1.0 | NaN |
| ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... |
| 17360 | 261023 | 21 H. Broun | 16 | https://cdn.sofifa.net/players/261/023/21_60.png | Scotland | https://cdn.sofifa.net/flags/gb-sct.png | 52 | 72 | Kilmarnock | https://cdn.sofifa.net/teams/82/30.png | ... | No | <span class="pos pos29">RES | Sep 17, 2020 | NaN | 2022 | 182cm | 70kg | €523K | 40.0 | NaN |
| 15536 | 263639 | 22 M. Pavel | 16 | https://cdn.sofifa.net/players/263/639/22_60.png | Romania | https://cdn.sofifa.net/flags/ro.png | 51 | 69 | FC Dinamo 1948 Bucureşti | https://cdn.sofifa.net/teams/100757/30.png | ... | No | <span class="pos pos29">RES | Jul 1, 2021 | NaN | 2023 | 178cm | 66kg | €277K | 77.0 | NaN |
| 11398 | 256405 | 21 W. Essanoussi | 16 | https://cdn.sofifa.net/players/256/405/21_60.png | Netherlands | https://cdn.sofifa.net/flags/nl.png | 59 | 75 | VVV-Venlo | https://cdn.sofifa.net/teams/100651/30.png | ... | No | <span class="pos pos29">RES | Jul 1, 2019 | NaN | 2022 | 178cm | 70kg | €1.1M | 24.0 | NaN |
| 15030 | 270594 | T. Walczak | 16 | https://cdn.sofifa.net/players/270/594/23_60.png | Poland | https://cdn.sofifa.net/flags/pl.png | 54 | 68 | Wisła Płock | https://cdn.sofifa.net/teams/1569/30.png | ... | No | <span class="pos pos29">RES | Sep 7, 2021 | NaN | 2023 | 191cm | 88kg | €494K | 99.0 | NaN |
| 17636 | 263636 | 22 D. Oncescu | 15 | https://cdn.sofifa.net/players/263/636/22_60.png | Romania | https://cdn.sofifa.net/flags/ro.png | 50 | 72 | FC Dinamo 1948 Bucureşti | https://cdn.sofifa.net/teams/100757/30.png | ... | No | <span class="pos pos29">RES | Jun 1, 2021 | NaN | 2025 | 190cm | 77kg | €306K | 34.0 | NaN |
17660 rows × 29 columns
- 예시 3 : 능력치가 좋은 순서대로 정렬
df.sort_values(by = 'Overall', ascending = False) ## 수가 높을수록 위로 가니까, by 생략해도 됨.| ID | Name | Age | Photo | Nationality | Flag | Overall | Potential | Club | Club Logo | ... | Real Face | Position | Joined | Loaned From | Contract Valid Until | Height | Weight | Release Clause | Kit Number | Best Overall Rating | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 41 | 188545 | R. Lewandowski | 33 | https://cdn.sofifa.net/players/188/545/23_60.png | Poland | https://cdn.sofifa.net/flags/pl.png | 91 | 91 | FC Barcelona | https://cdn.sofifa.net/teams/241/30.png | ... | Yes | <span class="pos pos25">ST | Jul 18, 2022 | NaN | 2025 | 185cm | 81kg | €172.2M | 9.0 | NaN |
| 124 | 165153 | K. Benzema | 34 | https://cdn.sofifa.net/players/165/153/23_60.png | France | https://cdn.sofifa.net/flags/fr.png | 91 | 91 | Real Madrid CF | https://cdn.sofifa.net/teams/243/30.png | ... | Yes | <span class="pos pos21">CF | Jul 9, 2009 | NaN | 2023 | 185cm | 81kg | €131.2M | 9.0 | NaN |
| 3 | 192985 | K. De Bruyne | 31 | https://cdn.sofifa.net/players/192/985/23_60.png | Belgium | https://cdn.sofifa.net/flags/be.png | 91 | 91 | Manchester City | https://cdn.sofifa.net/teams/10/30.png | ... | Yes | <span class="pos pos13">RCM | Aug 30, 2015 | NaN | 2025 | 181cm | 70kg | €198.9M | 17.0 | NaN |
| 56 | 158023 | L. Messi | 35 | https://cdn.sofifa.net/players/158/023/23_60.png | Argentina | https://cdn.sofifa.net/flags/ar.png | 91 | 91 | Paris Saint-Germain | https://cdn.sofifa.net/teams/73/30.png | ... | Yes | <span class="pos pos23">RW | Aug 10, 2021 | NaN | 2023 | 169cm | 67kg | €99.9M | 30.0 | NaN |
| 75 | 231747 | K. Mbappé | 23 | https://cdn.sofifa.net/players/231/747/23_60.png | France | https://cdn.sofifa.net/flags/fr.png | 91 | 95 | Paris Saint-Germain | https://cdn.sofifa.net/teams/73/30.png | ... | Yes | <span class="pos pos25">ST | Jul 1, 2018 | NaN | 2025 | 182cm | 73kg | €366.7M | 7.0 | NaN |
| ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... |
| 15513 | 266751 | 22 Jung Ho Yeon | 20 | https://cdn.sofifa.net/players/266/751/22_60.png | Korea Republic | https://cdn.sofifa.net/flags/kr.png | 45 | 53 | GwangJu FC | https://cdn.sofifa.net/teams/112258/30.png | ... | No | <span class="pos pos29">RES | Jan 20, 2022 | NaN | 2026 | 180cm | 73kg | €145K | 23.0 | NaN |
| 16215 | 268279 | 22 J. Looschen | 24 | https://cdn.sofifa.net/players/268/279/22_60.png | Germany | https://cdn.sofifa.net/flags/de.png | 44 | 47 | SV Meppen | https://cdn.sofifa.net/teams/110597/30.png | ... | No | <span class="pos pos29">RES | Mar 19, 2022 | NaN | 2026 | 178cm | 78kg | €92K | 42.0 | NaN |
| 16042 | 255283 | 20 Kim Yeong Geun | 22 | https://cdn.sofifa.net/players/255/283/20_60.png | Korea Republic | https://cdn.sofifa.net/flags/kr.png | 44 | 49 | Gyeongnam FC | https://cdn.sofifa.net/teams/111588/30.png | ... | No | <span class="pos pos29">RES | Jan 9, 2020 | NaN | 2020 | 174cm | 71kg | €53K | 43.0 | NaN |
| 14634 | 269038 | 22 Zhang Wenxuan | 16 | https://cdn.sofifa.net/players/269/038/22_60.png | China PR | https://cdn.sofifa.net/flags/cn.png | 44 | 59 | Guangzhou FC | https://cdn.sofifa.net/teams/111839/30.png | ... | No | <span class="pos pos29">RES | May 1, 2022 | NaN | 2022 | 175cm | 70kg | €239K | 29.0 | NaN |
| 17618 | 168933 | 07 I. Paskov | 33 | https://cdn.sofifa.net/players/168/933/07_60.png | Bulgaria | https://cdn.sofifa.net/flags/bg.png | 43 | 42 | NaN | https://cdn.sofifa.net/flags/bg.png | ... | NaN | <span class="pos pos28">SUB | NaN | NaN | NaN | 184cm | 79kg | NaN | 24.0 | NaN |
17660 rows × 29 columns
### F. df.info() | 행별 information 산출
시험에는 절대 안 낼 거지만 매우 중요한 것
df.info()<class 'pandas.core.frame.DataFrame'>
RangeIndex: 17660 entries, 0 to 17659
Data columns (total 29 columns):
# Column Non-Null Count Dtype
--- ------ -------------- -----
0 ID 17660 non-null int64
1 Name 17660 non-null object
2 Age 17660 non-null int64
3 Photo 17660 non-null object
4 Nationality 17660 non-null object
5 Flag 17660 non-null object
6 Overall 17660 non-null int64
7 Potential 17660 non-null int64
8 Club 17449 non-null object
9 Club Logo 17660 non-null object
10 Value 17660 non-null object
11 Wage 17660 non-null object
12 Special 17660 non-null int64
13 Preferred Foot 17660 non-null object
14 International Reputation 17660 non-null float64
15 Weak Foot 17660 non-null float64
16 Skill Moves 17660 non-null float64
17 Work Rate 17660 non-null object
18 Body Type 17622 non-null object
19 Real Face 17622 non-null object
20 Position 17625 non-null object
21 Joined 16562 non-null object
22 Loaned From 694 non-null object
23 Contract Valid Until 17299 non-null object
24 Height 17660 non-null object
25 Weight 17660 non-null object
26 Release Clause 16509 non-null object
27 Kit Number 17625 non-null float64
28 Best Overall Rating 21 non-null object
dtypes: float64(4), int64(5), object(20)
memory usage: 3.9+ MB
data들의 현황을 한눈에 파악하기 좋다.
df.iloc[:, [28]].sort_values('Best Overall Rating')| Best Overall Rating | |
|---|---|
| 13299 | <span class="bp3-tag p p-54">54</span> |
| 14366 | <span class="bp3-tag p p-56">56</span> |
| 16779 | <span class="bp3-tag p p-58">58</span> |
| 16968 | <span class="bp3-tag p p-58">58</span> |
| 16835 | <span class="bp3-tag p p-59">59</span> |
| ... | ... |
| 17655 | NaN |
| 17656 | NaN |
| 17657 | NaN |
| 17658 | NaN |
| 17659 | NaN |
17660 rows × 1 columns
df.loc[:, ['Best Overall Rating']].isna().sum()Best Overall Rating 17639
dtype: int64
결측치가 매우 많은 것을 볼 수 있다.
### G. df.isna()| 각 원소가 결측치인지 아닌지 산출
- 예시 1 : 열별로 결측치 카운트
df.isna() ## NaN 값이 있다면 True 산출| ID | Name | Age | Photo | Nationality | Flag | Overall | Potential | Club | Club Logo | ... | Real Face | Position | Joined | Loaned From | Contract Valid Until | Height | Weight | Release Clause | Kit Number | Best Overall Rating | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 0 | False | False | False | False | False | False | False | False | False | False | ... | False | False | False | True | False | False | False | False | False | True |
| 1 | False | False | False | False | False | False | False | False | False | False | ... | False | False | False | True | False | False | False | False | False | True |
| 2 | False | False | False | False | False | False | False | False | False | False | ... | False | False | False | True | False | False | False | False | False | True |
| 3 | False | False | False | False | False | False | False | False | False | False | ... | False | False | False | True | False | False | False | False | False | True |
| 4 | False | False | False | False | False | False | False | False | False | False | ... | False | False | False | True | False | False | False | False | False | True |
| ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... |
| 17655 | False | False | False | False | False | False | False | False | False | False | ... | False | False | False | True | False | False | False | False | False | True |
| 17656 | False | False | False | False | False | False | False | False | False | False | ... | False | False | False | True | False | False | False | False | False | True |
| 17657 | False | False | False | False | False | False | False | False | False | False | ... | False | False | False | True | False | False | False | False | False | True |
| 17658 | False | False | False | False | False | False | False | False | False | False | ... | False | False | False | True | False | False | False | False | False | True |
| 17659 | False | False | False | False | False | False | False | False | False | False | ... | False | False | False | True | False | False | False | False | False | True |
17660 rows × 29 columns
df.isna().sum(axis = 0) ## default : axis = 0ID 0
Name 0
Age 0
Photo 0
Nationality 0
Flag 0
Overall 0
Potential 0
Club 211
Club Logo 0
Value 0
Wage 0
Special 0
Preferred Foot 0
International Reputation 0
Weak Foot 0
Skill Moves 0
Work Rate 0
Body Type 38
Real Face 38
Position 35
Joined 1098
Loaned From 16966
Contract Valid Until 361
Height 0
Weight 0
Release Clause 1151
Kit Number 35
Best Overall Rating 17639
dtype: int64
arr = np.array([(True, False), (True, False), (False, True)])
arrarray([[ True, False],
[ True, False],
[False, True]])
arr.shape(3, 2)
arr.sum(axis = 0) ## 열별로 합array([2, 1])
arr.sum(axis = 1) ## 행별로 합array([1, 1, 1])
- 예시2 : 결측치가 50% 이상인 열 출력
type(df.isna().mean() > 0.5) ## 이 값 자체가 시리즈이므로 리스트로 넣으면 안된다.pandas.core.series.Series
df.loc[:, df.isna().mean() > 0.5] ## [df.isna().mean() > 0.5]는 에러뜸| Loaned From | Best Overall Rating | |
|---|---|---|
| 0 | NaN | NaN |
| 1 | NaN | NaN |
| 2 | NaN | NaN |
| 3 | NaN | NaN |
| 4 | NaN | NaN |
| ... | ... | ... |
| 17655 | NaN | NaN |
| 17656 | NaN | NaN |
| 17657 | NaN | NaN |
| 17658 | NaN | NaN |
| 17659 | NaN | NaN |
17660 rows × 2 columns
### H. df.drop() | 특정 행이나 열을 drop
- 예시 1 : [0,1,2,3] 행을 drop
df.drop([0,1,2,3])
## df.drop([0,1,2,3], axis = 0)| ID | Name | Age | Photo | Nationality | Flag | Overall | Potential | Club | Club Logo | ... | Real Face | Position | Joined | Loaned From | Contract Valid Until | Height | Weight | Release Clause | Kit Number | Best Overall Rating | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 4 | 224232 | N. Barella | 25 | https://cdn.sofifa.net/players/224/232/23_60.png | Italy | https://cdn.sofifa.net/flags/it.png | 86 | 89 | Inter | https://cdn.sofifa.net/teams/44/30.png | ... | Yes | <span class="pos pos13">RCM | Sep 1, 2020 | NaN | 2026 | 172cm | 68kg | €154.4M | 23.0 | NaN |
| 5 | 212622 | J. Kimmich | 27 | https://cdn.sofifa.net/players/212/622/23_60.png | Germany | https://cdn.sofifa.net/flags/de.png | 89 | 90 | FC Bayern München | https://cdn.sofifa.net/teams/21/30.png | ... | Yes | <span class="pos pos9">RDM | Jul 1, 2015 | NaN | 2025 | 177cm | 75kg | €182M | 6.0 | NaN |
| 6 | 197445 | D. Alaba | 30 | https://cdn.sofifa.net/players/197/445/23_60.png | Austria | https://cdn.sofifa.net/flags/at.png | 86 | 86 | Real Madrid CF | https://cdn.sofifa.net/teams/243/30.png | ... | Yes | <span class="pos pos6">LCB | Jul 1, 2021 | NaN | 2026 | 180cm | 78kg | €113.8M | 4.0 | NaN |
| 7 | 187961 | 22 Paulinho | 32 | https://cdn.sofifa.net/players/187/961/22_60.png | Brazil | https://cdn.sofifa.net/flags/br.png | 83 | 83 | Al Ahli | https://cdn.sofifa.net/teams/112387/30.png | ... | Yes | <span class="pos pos15">LCM | Jul 22, 2021 | NaN | 2024 | 183cm | 80kg | €48.5M | 15.0 | NaN |
| 8 | 208333 | E. Can | 28 | https://cdn.sofifa.net/players/208/333/23_60.png | Germany | https://cdn.sofifa.net/flags/de.png | 82 | 82 | Borussia Dortmund | https://cdn.sofifa.net/teams/22/30.png | ... | Yes | <span class="pos pos28">SUB | Feb 18, 2020 | NaN | 2024 | 186cm | 86kg | €51.9M | 23.0 | NaN |
| ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... |
| 17655 | 269526 | Deng Xiongtao | 19 | https://cdn.sofifa.net/players/269/526/23_60.png | China PR | https://cdn.sofifa.net/flags/cn.png | 48 | 61 | Meizhou Hakka | https://cdn.sofifa.net/teams/114628/30.png | ... | No | <span class="pos pos29">RES | Apr 11, 2022 | NaN | 2027 | 190cm | 78kg | €218K | 35.0 | NaN |
| 17656 | 267946 | 22 Lim Jun Sub | 17 | https://cdn.sofifa.net/players/267/946/22_60.png | Korea Republic | https://cdn.sofifa.net/flags/kr.png | 48 | 64 | Jeju United FC | https://cdn.sofifa.net/teams/1478/30.png | ... | No | <span class="pos pos29">RES | Jan 1, 2022 | NaN | 2026 | 195cm | 84kg | €188K | 21.0 | NaN |
| 17657 | 270567 | A. Demir | 25 | https://cdn.sofifa.net/players/270/567/23_60.png | Turkey | https://cdn.sofifa.net/flags/tr.png | 51 | 56 | Ümraniyespor | https://cdn.sofifa.net/teams/113796/30.png | ... | No | <span class="pos pos29">RES | Jun 6, 2021 | NaN | 2023 | 190cm | 82kg | €142K | 12.0 | NaN |
| 17658 | 256624 | 21 S. Czajor | 18 | https://cdn.sofifa.net/players/256/624/21_60.png | Poland | https://cdn.sofifa.net/flags/pl.png | 50 | 65 | Fleetwood Town | https://cdn.sofifa.net/teams/112260/30.png | ... | No | <span class="pos pos29">RES | Jan 1, 2020 | NaN | 2021 | 187cm | 79kg | €214K | 40.0 | NaN |
| 17659 | 256376 | 21 F. Jakobsson | 20 | https://cdn.sofifa.net/players/256/376/21_60.png | Sweden | https://cdn.sofifa.net/flags/se.png | 50 | 61 | IFK Norrköping | https://cdn.sofifa.net/teams/702/30.png | ... | No | <span class="pos pos29">RES | Jan 8, 2020 | NaN | 2021 | 186cm | 78kg | €131K | 30.0 | NaN |
17656 rows × 29 columns
- 예시 2 : ['Name', 'Age']열을 drop
df.drop(columns = ['Name', 'Age'])
## df.drop(['Name', 'Age'], axis = 1)| ID | Photo | Nationality | Flag | Overall | Potential | Club | Club Logo | Value | Wage | ... | Real Face | Position | Joined | Loaned From | Contract Valid Until | Height | Weight | Release Clause | Kit Number | Best Overall Rating | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 0 | 209658 | https://cdn.sofifa.net/players/209/658/23_60.png | Germany | https://cdn.sofifa.net/flags/de.png | 87 | 88 | FC Bayern München | https://cdn.sofifa.net/teams/21/30.png | €91M | €115K | ... | Yes | <span class="pos pos28">SUB | Jul 1, 2018 | NaN | 2026 | 189cm | 82kg | €157M | 8.0 | NaN |
| 1 | 212198 | https://cdn.sofifa.net/players/212/198/23_60.png | Portugal | https://cdn.sofifa.net/flags/pt.png | 86 | 87 | Manchester United | https://cdn.sofifa.net/teams/11/30.png | €78.5M | €190K | ... | Yes | <span class="pos pos15">LCM | Jan 30, 2020 | NaN | 2026 | 179cm | 69kg | €155M | 8.0 | NaN |
| 2 | 224334 | https://cdn.sofifa.net/players/224/334/23_60.png | Argentina | https://cdn.sofifa.net/flags/ar.png | 85 | 85 | Sevilla FC | https://cdn.sofifa.net/teams/481/30.png | €46.5M | €46K | ... | No | <span class="pos pos7">LB | Sep 14, 2020 | NaN | 2024 | 172cm | 69kg | €97.7M | 19.0 | NaN |
| 3 | 192985 | https://cdn.sofifa.net/players/192/985/23_60.png | Belgium | https://cdn.sofifa.net/flags/be.png | 91 | 91 | Manchester City | https://cdn.sofifa.net/teams/10/30.png | €107.5M | €350K | ... | Yes | <span class="pos pos13">RCM | Aug 30, 2015 | NaN | 2025 | 181cm | 70kg | €198.9M | 17.0 | NaN |
| 4 | 224232 | https://cdn.sofifa.net/players/224/232/23_60.png | Italy | https://cdn.sofifa.net/flags/it.png | 86 | 89 | Inter | https://cdn.sofifa.net/teams/44/30.png | €89.5M | €110K | ... | Yes | <span class="pos pos13">RCM | Sep 1, 2020 | NaN | 2026 | 172cm | 68kg | €154.4M | 23.0 | NaN |
| ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... |
| 17655 | 269526 | https://cdn.sofifa.net/players/269/526/23_60.png | China PR | https://cdn.sofifa.net/flags/cn.png | 48 | 61 | Meizhou Hakka | https://cdn.sofifa.net/teams/114628/30.png | €100K | €500 | ... | No | <span class="pos pos29">RES | Apr 11, 2022 | NaN | 2027 | 190cm | 78kg | €218K | 35.0 | NaN |
| 17656 | 267946 | https://cdn.sofifa.net/players/267/946/22_60.png | Korea Republic | https://cdn.sofifa.net/flags/kr.png | 48 | 64 | Jeju United FC | https://cdn.sofifa.net/teams/1478/30.png | €100K | €500 | ... | No | <span class="pos pos29">RES | Jan 1, 2022 | NaN | 2026 | 195cm | 84kg | €188K | 21.0 | NaN |
| 17657 | 270567 | https://cdn.sofifa.net/players/270/567/23_60.png | Turkey | https://cdn.sofifa.net/flags/tr.png | 51 | 56 | Ümraniyespor | https://cdn.sofifa.net/teams/113796/30.png | €70K | €2K | ... | No | <span class="pos pos29">RES | Jun 6, 2021 | NaN | 2023 | 190cm | 82kg | €142K | 12.0 | NaN |
| 17658 | 256624 | https://cdn.sofifa.net/players/256/624/21_60.png | Poland | https://cdn.sofifa.net/flags/pl.png | 50 | 65 | Fleetwood Town | https://cdn.sofifa.net/teams/112260/30.png | €90K | €500 | ... | No | <span class="pos pos29">RES | Jan 1, 2020 | NaN | 2021 | 187cm | 79kg | €214K | 40.0 | NaN |
| 17659 | 256376 | https://cdn.sofifa.net/players/256/376/21_60.png | Sweden | https://cdn.sofifa.net/flags/se.png | 50 | 61 | IFK Norrköping | https://cdn.sofifa.net/teams/702/30.png | €90K | €500 | ... | No | <span class="pos pos29">RES | Jan 8, 2020 | NaN | 2021 | 186cm | 78kg | €131K | 30.0 | NaN |
17660 rows × 27 columns
결국에 axis 옵션만 기억하면 다른 parameter를 기억하지 않아도 된다.
# G~H 에 대한 연습문제
# 예제: 결측치가 50퍼 이상인 열을 제외하라.
- 풀이 1 : 무지성 직접 제외
df.isna().mean() ## := df.isna().sum() / len(df). SeriesID 0.000000
Name 0.000000
Age 0.000000
Photo 0.000000
Nationality 0.000000
Flag 0.000000
Overall 0.000000
Potential 0.000000
Club 0.011948
Club Logo 0.000000
Value 0.000000
Wage 0.000000
Special 0.000000
Preferred Foot 0.000000
International Reputation 0.000000
Weak Foot 0.000000
Skill Moves 0.000000
Work Rate 0.000000
Body Type 0.002152
Real Face 0.002152
Position 0.001982
Joined 0.062174
Loaned From 0.960702
Contract Valid Until 0.020442
Height 0.000000
Weight 0.000000
Release Clause 0.065176
Kit Number 0.001982
Best Overall Rating 0.998811
dtype: float64
df.drop(columns=['Loaned From','Best Overall Rating'])| ID | Name | Age | Photo | Nationality | Flag | Overall | Potential | Club | Club Logo | ... | Work Rate | Body Type | Real Face | Position | Joined | Contract Valid Until | Height | Weight | Release Clause | Kit Number | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 0 | 209658 | L. Goretzka | 27 | https://cdn.sofifa.net/players/209/658/23_60.png | Germany | https://cdn.sofifa.net/flags/de.png | 87 | 88 | FC Bayern München | https://cdn.sofifa.net/teams/21/30.png | ... | High/ Medium | Unique | Yes | <span class="pos pos28">SUB | Jul 1, 2018 | 2026 | 189cm | 82kg | €157M | 8.0 |
| 1 | 212198 | Bruno Fernandes | 27 | https://cdn.sofifa.net/players/212/198/23_60.png | Portugal | https://cdn.sofifa.net/flags/pt.png | 86 | 87 | Manchester United | https://cdn.sofifa.net/teams/11/30.png | ... | High/ High | Unique | Yes | <span class="pos pos15">LCM | Jan 30, 2020 | 2026 | 179cm | 69kg | €155M | 8.0 |
| 2 | 224334 | M. Acuña | 30 | https://cdn.sofifa.net/players/224/334/23_60.png | Argentina | https://cdn.sofifa.net/flags/ar.png | 85 | 85 | Sevilla FC | https://cdn.sofifa.net/teams/481/30.png | ... | High/ High | Stocky (170-185) | No | <span class="pos pos7">LB | Sep 14, 2020 | 2024 | 172cm | 69kg | €97.7M | 19.0 |
| 3 | 192985 | K. De Bruyne | 31 | https://cdn.sofifa.net/players/192/985/23_60.png | Belgium | https://cdn.sofifa.net/flags/be.png | 91 | 91 | Manchester City | https://cdn.sofifa.net/teams/10/30.png | ... | High/ High | Unique | Yes | <span class="pos pos13">RCM | Aug 30, 2015 | 2025 | 181cm | 70kg | €198.9M | 17.0 |
| 4 | 224232 | N. Barella | 25 | https://cdn.sofifa.net/players/224/232/23_60.png | Italy | https://cdn.sofifa.net/flags/it.png | 86 | 89 | Inter | https://cdn.sofifa.net/teams/44/30.png | ... | High/ High | Normal (170-) | Yes | <span class="pos pos13">RCM | Sep 1, 2020 | 2026 | 172cm | 68kg | €154.4M | 23.0 |
| ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... |
| 17655 | 269526 | Deng Xiongtao | 19 | https://cdn.sofifa.net/players/269/526/23_60.png | China PR | https://cdn.sofifa.net/flags/cn.png | 48 | 61 | Meizhou Hakka | https://cdn.sofifa.net/teams/114628/30.png | ... | Medium/ Medium | Normal (185+) | No | <span class="pos pos29">RES | Apr 11, 2022 | 2027 | 190cm | 78kg | €218K | 35.0 |
| 17656 | 267946 | 22 Lim Jun Sub | 17 | https://cdn.sofifa.net/players/267/946/22_60.png | Korea Republic | https://cdn.sofifa.net/flags/kr.png | 48 | 64 | Jeju United FC | https://cdn.sofifa.net/teams/1478/30.png | ... | Medium/ Medium | Lean (185+) | No | <span class="pos pos29">RES | Jan 1, 2022 | 2026 | 195cm | 84kg | €188K | 21.0 |
| 17657 | 270567 | A. Demir | 25 | https://cdn.sofifa.net/players/270/567/23_60.png | Turkey | https://cdn.sofifa.net/flags/tr.png | 51 | 56 | Ümraniyespor | https://cdn.sofifa.net/teams/113796/30.png | ... | Medium/ Medium | Lean (185+) | No | <span class="pos pos29">RES | Jun 6, 2021 | 2023 | 190cm | 82kg | €142K | 12.0 |
| 17658 | 256624 | 21 S. Czajor | 18 | https://cdn.sofifa.net/players/256/624/21_60.png | Poland | https://cdn.sofifa.net/flags/pl.png | 50 | 65 | Fleetwood Town | https://cdn.sofifa.net/teams/112260/30.png | ... | Medium/ Medium | Normal (185+) | No | <span class="pos pos29">RES | Jan 1, 2020 | 2021 | 187cm | 79kg | €214K | 40.0 |
| 17659 | 256376 | 21 F. Jakobsson | 20 | https://cdn.sofifa.net/players/256/376/21_60.png | Sweden | https://cdn.sofifa.net/flags/se.png | 50 | 61 | IFK Norrköping | https://cdn.sofifa.net/teams/702/30.png | ... | Medium/ Medium | Normal (185+) | No | <span class="pos pos29">RES | Jan 8, 2020 | 2021 | 186cm | 78kg | €131K | 30.0 |
17660 rows × 27 columns
- 풀이 2 : 이성적인 풀이
df.loc[:, df.isna().mean() < 0.5] ## 시리즈니까 리스트로 묶지 말것!!| ID | Name | Age | Photo | Nationality | Flag | Overall | Potential | Club | Club Logo | ... | Work Rate | Body Type | Real Face | Position | Joined | Contract Valid Until | Height | Weight | Release Clause | Kit Number | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 0 | 209658 | L. Goretzka | 27 | https://cdn.sofifa.net/players/209/658/23_60.png | Germany | https://cdn.sofifa.net/flags/de.png | 87 | 88 | FC Bayern München | https://cdn.sofifa.net/teams/21/30.png | ... | High/ Medium | Unique | Yes | <span class="pos pos28">SUB | Jul 1, 2018 | 2026 | 189cm | 82kg | €157M | 8.0 |
| 1 | 212198 | Bruno Fernandes | 27 | https://cdn.sofifa.net/players/212/198/23_60.png | Portugal | https://cdn.sofifa.net/flags/pt.png | 86 | 87 | Manchester United | https://cdn.sofifa.net/teams/11/30.png | ... | High/ High | Unique | Yes | <span class="pos pos15">LCM | Jan 30, 2020 | 2026 | 179cm | 69kg | €155M | 8.0 |
| 2 | 224334 | M. Acuña | 30 | https://cdn.sofifa.net/players/224/334/23_60.png | Argentina | https://cdn.sofifa.net/flags/ar.png | 85 | 85 | Sevilla FC | https://cdn.sofifa.net/teams/481/30.png | ... | High/ High | Stocky (170-185) | No | <span class="pos pos7">LB | Sep 14, 2020 | 2024 | 172cm | 69kg | €97.7M | 19.0 |
| 3 | 192985 | K. De Bruyne | 31 | https://cdn.sofifa.net/players/192/985/23_60.png | Belgium | https://cdn.sofifa.net/flags/be.png | 91 | 91 | Manchester City | https://cdn.sofifa.net/teams/10/30.png | ... | High/ High | Unique | Yes | <span class="pos pos13">RCM | Aug 30, 2015 | 2025 | 181cm | 70kg | €198.9M | 17.0 |
| 4 | 224232 | N. Barella | 25 | https://cdn.sofifa.net/players/224/232/23_60.png | Italy | https://cdn.sofifa.net/flags/it.png | 86 | 89 | Inter | https://cdn.sofifa.net/teams/44/30.png | ... | High/ High | Normal (170-) | Yes | <span class="pos pos13">RCM | Sep 1, 2020 | 2026 | 172cm | 68kg | €154.4M | 23.0 |
| ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... |
| 17655 | 269526 | Deng Xiongtao | 19 | https://cdn.sofifa.net/players/269/526/23_60.png | China PR | https://cdn.sofifa.net/flags/cn.png | 48 | 61 | Meizhou Hakka | https://cdn.sofifa.net/teams/114628/30.png | ... | Medium/ Medium | Normal (185+) | No | <span class="pos pos29">RES | Apr 11, 2022 | 2027 | 190cm | 78kg | €218K | 35.0 |
| 17656 | 267946 | 22 Lim Jun Sub | 17 | https://cdn.sofifa.net/players/267/946/22_60.png | Korea Republic | https://cdn.sofifa.net/flags/kr.png | 48 | 64 | Jeju United FC | https://cdn.sofifa.net/teams/1478/30.png | ... | Medium/ Medium | Lean (185+) | No | <span class="pos pos29">RES | Jan 1, 2022 | 2026 | 195cm | 84kg | €188K | 21.0 |
| 17657 | 270567 | A. Demir | 25 | https://cdn.sofifa.net/players/270/567/23_60.png | Turkey | https://cdn.sofifa.net/flags/tr.png | 51 | 56 | Ümraniyespor | https://cdn.sofifa.net/teams/113796/30.png | ... | Medium/ Medium | Lean (185+) | No | <span class="pos pos29">RES | Jun 6, 2021 | 2023 | 190cm | 82kg | €142K | 12.0 |
| 17658 | 256624 | 21 S. Czajor | 18 | https://cdn.sofifa.net/players/256/624/21_60.png | Poland | https://cdn.sofifa.net/flags/pl.png | 50 | 65 | Fleetwood Town | https://cdn.sofifa.net/teams/112260/30.png | ... | Medium/ Medium | Normal (185+) | No | <span class="pos pos29">RES | Jan 1, 2020 | 2021 | 187cm | 79kg | €214K | 40.0 |
| 17659 | 256376 | 21 F. Jakobsson | 20 | https://cdn.sofifa.net/players/256/376/21_60.png | Sweden | https://cdn.sofifa.net/flags/se.png | 50 | 61 | IFK Norrköping | https://cdn.sofifa.net/teams/702/30.png | ... | Medium/ Medium | Normal (185+) | No | <span class="pos pos29">RES | Jan 8, 2020 | 2021 | 186cm | 78kg | €131K | 30.0 |
17660 rows × 27 columns
3. Pandas : missing
### A. Numpy
- 발생 : np.nan
np.nannan
arr = np.array([1,2,3,np.nan])
arrarray([ 1., 2., 3., nan])
arr.mean()nan
print(type(np.nan)) ## np.nan 자체는 일종의 float로 취급된다.
print(type(arr[0]))<class 'float'>
<class 'numpy.float64'>
### B. Pandas
- 발생 : np.nan, pd.NA
pd.Series([np.nan,1,2,3])0 NaN
1 1.0
2 2.0
3 3.0
dtype: float64
pd.Series([pd.NA, 1, 2, 3])0 <NA>
1 1
2 2
3 3
dtype: object
위 두 개의 코드는 동일하다고 봐도 무방하다.
- pd.Serise에 NaN 또는 <NA>가 있다면 연산할 때 제외함
print(f'np.nan을 넣은 시리즈의 평균 : {pd.Series([np.nan,1,2,3]).mean()} = pd.NA를 넣은 시리즈의 평균 : {pd.Series([pd.NA,1,2,3]).mean()}')np.nan을 넣은 시리즈의 평균 : 2.0 = pd.NA를 넣은 시리즈의 평균 : 2.0
s1 = pd.Series([np.nan,1,2,3])
type(s1[0])numpy.float64
s2 = pd.Series([pd.NA, 1,2,3])
type(s2[0])pandas._libs.missing.NAType
missing은 그냥 NaN이라고 보자.
- 검출(\(\star\))(중요한가?)
s1.isna()0 True
1 False
2 False
3 False
dtype: bool
s2.isna()0 True
1 False
2 False
3 False
dtype: bool
pd.isna(s1[0]), pd.isnull(s1[0]) ## 결측치가 있느냐?(True, True)
pd.isna(s2[0]), pd.isnull(s2[0]) ## 결측치가 있느냐?(True, True)
id(pd.isna), id(pd.isnull) # 같은함수(135146401797248, 135146401797248)
id를 찍었을 때 같다면 같은 함수이다.
5. Pandas : query
개 간단하고 쉽지만 고점은 낮은 데이터 처리방식
ts = pd.DataFrame(np.random.normal(size=(20,4)),columns=list('ABCD'),index=pd.date_range('20221226',periods=20)).assign(E=['A']*10+['B']*10)
ts| A | B | C | D | E | |
|---|---|---|---|---|---|
| 2022-12-26 | -1.027712 | -0.590487 | 1.580671 | -0.315109 | A |
| 2022-12-27 | 0.640362 | -0.520975 | -0.607376 | -0.560362 | A |
| 2022-12-28 | 0.625888 | -1.711870 | 0.573349 | 0.040879 | A |
| 2022-12-29 | -1.494778 | -0.333769 | 0.028889 | 0.984416 | A |
| 2022-12-30 | 2.573588 | -0.005872 | 0.868897 | 0.932830 | A |
| 2022-12-31 | 0.146699 | -0.306216 | 1.241642 | -1.008297 | A |
| 2023-01-01 | 1.105096 | -0.492485 | 0.865509 | -0.383760 | A |
| 2023-01-02 | -1.136712 | 0.595607 | -1.938775 | 0.201931 | A |
| 2023-01-03 | 0.118754 | 0.119941 | -0.828199 | -1.356401 | A |
| 2023-01-04 | 0.673908 | 1.199221 | 1.454181 | -0.370048 | A |
| 2023-01-05 | 0.621326 | 0.150997 | -0.479691 | 0.810434 | B |
| 2023-01-06 | -0.095612 | -0.692796 | 0.456627 | -0.395918 | B |
| 2023-01-07 | 1.117905 | 0.431402 | -0.235017 | 0.897339 | B |
| 2023-01-08 | -0.939328 | 0.745621 | 0.632724 | 0.032088 | B |
| 2023-01-09 | 1.158532 | -2.312485 | -1.292257 | -1.325453 | B |
| 2023-01-10 | -0.339565 | -0.460976 | 0.320097 | 0.482333 | B |
| 2023-01-11 | -0.117493 | -1.964531 | -1.867120 | 2.325713 | B |
| 2023-01-12 | 0.574654 | 0.984037 | 0.641058 | 0.264561 | B |
| 2023-01-13 | -0.252865 | 0.519606 | 0.373864 | 0.520175 | B |
| 2023-01-14 | -1.069801 | -0.659982 | -0.368828 | 1.286645 | B |
### A. 기본 query
- 예시1: A>0 and B<0
ts.query('A>0 and B<0')| A | B | C | D | E | |
|---|---|---|---|---|---|
| 2022-12-27 | 0.640362 | -0.520975 | -0.607376 | -0.560362 | A |
| 2022-12-28 | 0.625888 | -1.711870 | 0.573349 | 0.040879 | A |
| 2022-12-30 | 2.573588 | -0.005872 | 0.868897 | 0.932830 | A |
| 2022-12-31 | 0.146699 | -0.306216 | 1.241642 | -1.008297 | A |
| 2023-01-01 | 1.105096 | -0.492485 | 0.865509 | -0.383760 | A |
| 2023-01-09 | 1.158532 | -2.312485 | -1.292257 | -1.325453 | B |
- 예시2: A<B<C
ts.query('A<B<C')| A | B | C | D | E | |
|---|---|---|---|---|---|
| 2022-12-26 | -1.027712 | -0.590487 | 1.580671 | -0.315109 | A |
| 2022-12-29 | -1.494778 | -0.333769 | 0.028889 | 0.984416 | A |
| 2023-01-04 | 0.673908 | 1.199221 | 1.454181 | -0.370048 | A |
| 2023-01-14 | -1.069801 | -0.659982 | -0.368828 | 1.286645 | B |
- 예시3: (A+B/2) > 0
ts.query('(A+B)/2 > 0')| A | B | C | D | E | |
|---|---|---|---|---|---|
| 2022-12-27 | 0.640362 | -0.520975 | -0.607376 | -0.560362 | A |
| 2022-12-30 | 2.573588 | -0.005872 | 0.868897 | 0.932830 | A |
| 2023-01-01 | 1.105096 | -0.492485 | 0.865509 | -0.383760 | A |
| 2023-01-03 | 0.118754 | 0.119941 | -0.828199 | -1.356401 | A |
| 2023-01-04 | 0.673908 | 1.199221 | 1.454181 | -0.370048 | A |
| 2023-01-05 | 0.621326 | 0.150997 | -0.479691 | 0.810434 | B |
| 2023-01-07 | 1.117905 | 0.431402 | -0.235017 | 0.897339 | B |
| 2023-01-12 | 0.574654 | 0.984037 | 0.641058 | 0.264561 | B |
| 2023-01-13 | -0.252865 | 0.519606 | 0.373864 | 0.520175 | B |
- 예시4: (A+B/2) > 0 and E=='A'
ts.query('(A+B)/2 > 0 and E == "A"') ## string 조건을 넣어주고 싶으면 다른 따옴표로 구분하면 된다.| A | B | C | D | E | |
|---|---|---|---|---|---|
| 2022-12-27 | 0.640362 | -0.520975 | -0.607376 | -0.560362 | A |
| 2022-12-30 | 2.573588 | -0.005872 | 0.868897 | 0.932830 | A |
| 2023-01-01 | 1.105096 | -0.492485 | 0.865509 | -0.383760 | A |
| 2023-01-03 | 0.118754 | 0.119941 | -0.828199 | -1.356401 | A |
| 2023-01-04 | 0.673908 | 1.199221 | 1.454181 | -0.370048 | A |
그냥 스트링으로 된 것들 중에는 생각헀던 건 왠만해선 다 된다.
### B. 외부변수를 이용
- 예시1: A > mean(A)
mean = ts.A.mean()
mean0.14414224086779243
#ts.query('A > np.mean(A)') ## 이건 안됨
#ts.query('A > A.mean()') ## 이건 되긴 함
#ts.query('A > mean') ## column 중 하나인지 뭔지 헷갈림, 그래서 안됨
ts.query('A > @mean')| A | B | C | D | E | |
|---|---|---|---|---|---|
| 2022-12-27 | 0.640362 | -0.520975 | -0.607376 | -0.560362 | A |
| 2022-12-28 | 0.625888 | -1.711870 | 0.573349 | 0.040879 | A |
| 2022-12-30 | 2.573588 | -0.005872 | 0.868897 | 0.932830 | A |
| 2022-12-31 | 0.146699 | -0.306216 | 1.241642 | -1.008297 | A |
| 2023-01-01 | 1.105096 | -0.492485 | 0.865509 | -0.383760 | A |
| 2023-01-04 | 0.673908 | 1.199221 | 1.454181 | -0.370048 | A |
| 2023-01-05 | 0.621326 | 0.150997 | -0.479691 | 0.810434 | B |
| 2023-01-07 | 1.117905 | 0.431402 | -0.235017 | 0.897339 | B |
| 2023-01-09 | 1.158532 | -2.312485 | -1.292257 | -1.325453 | B |
| 2023-01-12 | 0.574654 | 0.984037 | 0.641058 | 0.264561 | B |
A.mean()보다 작은 값들을 산출했다.
value = np.percentile(ts.B, 77) ## ts.B에서 77백분위수에 해당하는 숫자
ts.query('B > @value')| A | B | C | D | E | |
|---|---|---|---|---|---|
| 2023-01-02 | -1.136712 | 0.595607 | -1.938775 | 0.201931 | A |
| 2023-01-04 | 0.673908 | 1.199221 | 1.454181 | -0.370048 | A |
| 2023-01-08 | -0.939328 | 0.745621 | 0.632724 | 0.032088 | B |
| 2023-01-12 | 0.574654 | 0.984037 | 0.641058 | 0.264561 | B |
| 2023-01-13 | -0.252865 | 0.519606 | 0.373864 | 0.520175 | B |
### C. Index로 query
- 예시: (2022년 12월30일 보다 이전 날짜) \(\cup\) (2023년 1월10일)
ts.query('index < "2022-12-30" or index == "2023-01-10"')| A | B | C | D | E | |
|---|---|---|---|---|---|
| 2022-12-26 | -1.027712 | -0.590487 | 1.580671 | -0.315109 | A |
| 2022-12-27 | 0.640362 | -0.520975 | -0.607376 | -0.560362 | A |
| 2022-12-28 | 0.625888 | -1.711870 | 0.573349 | 0.040879 | A |
| 2022-12-29 | -1.494778 | -0.333769 | 0.028889 | 0.984416 | A |
| 2023-01-10 | -0.339565 | -0.460976 | 0.320097 | 0.482333 | B |
### D. 열의 이름에 공백이 있을 경우
열의 이름에 공백이 있으면 백틱을 이용하면 된다.
df = pd.read_csv('https://raw.githubusercontent.com/guebin/DV2022/master/posts/FIFA23_official_data.csv')
df.columnsIndex(['ID', 'Name', 'Age', 'Photo', 'Nationality', 'Flag', 'Overall',
'Potential', 'Club', 'Club Logo', 'Value', 'Wage', 'Special',
'Preferred Foot', 'International Reputation', 'Weak Foot',
'Skill Moves', 'Work Rate', 'Body Type', 'Real Face', 'Position',
'Joined', 'Loaned From', 'Contract Valid Until', 'Height', 'Weight',
'Release Clause', 'Kit Number', 'Best Overall Rating'],
dtype='object')
df.query('`Skill Moves` > 4') ## `를 사용| ID | Name | Age | Photo | Nationality | Flag | Overall | Potential | Club | Club Logo | ... | Real Face | Position | Joined | Loaned From | Contract Valid Until | Height | Weight | Release Clause | Kit Number | Best Overall Rating | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 19 | 193082 | J. Cuadrado | 34 | https://cdn.sofifa.net/players/193/082/23_60.png | Colombia | https://cdn.sofifa.net/flags/co.png | 83 | 83 | Juventus | https://cdn.sofifa.net/teams/45/30.png | ... | Yes | <span class="pos pos3">RB | Jul 1, 2017 | NaN | 2023 | 179cm | 72kg | €23M | 11.0 | NaN |
| 27 | 189509 | Thiago | 31 | https://cdn.sofifa.net/players/189/509/23_60.png | Spain | https://cdn.sofifa.net/flags/es.png | 86 | 86 | Liverpool | https://cdn.sofifa.net/teams/9/30.png | ... | Yes | <span class="pos pos15">LCM | Sep 18, 2020 | NaN | 2024 | 174cm | 70kg | €102.7M | 6.0 | NaN |
| 44 | 232411 | C. Nkunku | 24 | https://cdn.sofifa.net/players/232/411/23_60.png | France | https://cdn.sofifa.net/flags/fr.png | 86 | 89 | RB Leipzig | https://cdn.sofifa.net/teams/112172/30.png | ... | Yes | <span class="pos pos28">SUB | NaN | NaN | NaN | 175cm | 73kg | €166.9M | 12.0 | NaN |
| 62 | 233927 | Lucas Paquetá | 24 | https://cdn.sofifa.net/players/233/927/23_60.png | Brazil | https://cdn.sofifa.net/flags/br.png | 82 | 87 | Olympique Lyonnais | https://cdn.sofifa.net/teams/66/30.png | ... | Yes | <span class="pos pos15">LCM | Oct 1, 2020 | NaN | 2025 | 180cm | 72kg | €90.9M | 10.0 | NaN |
| 75 | 231747 | K. Mbappé | 23 | https://cdn.sofifa.net/players/231/747/23_60.png | France | https://cdn.sofifa.net/flags/fr.png | 91 | 95 | Paris Saint-Germain | https://cdn.sofifa.net/teams/73/30.png | ... | Yes | <span class="pos pos25">ST | Jul 1, 2018 | NaN | 2025 | 182cm | 73kg | €366.7M | 7.0 | NaN |
| ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... |
| 4516 | 253755 | Talles Magno | 20 | https://cdn.sofifa.net/players/253/755/23_60.png | Brazil | https://cdn.sofifa.net/flags/br.png | 71 | 83 | New York City FC | https://cdn.sofifa.net/teams/112828/30.png | ... | No | <span class="pos pos16">LM | May 18, 2021 | NaN | 2026 | 186cm | 70kg | €7.7M | 43.0 | NaN |
| 4643 | 246548 | O. Sahraoui | 21 | https://cdn.sofifa.net/players/246/548/23_60.png | Norway | https://cdn.sofifa.net/flags/no.png | 67 | 78 | Vålerenga Fotball | https://cdn.sofifa.net/teams/920/30.png | ... | No | <span class="pos pos27">LW | May 15, 2019 | NaN | 2023 | 170cm | 65kg | €3.3M | 10.0 | NaN |
| 4872 | 251570 | R. Cherki | 18 | https://cdn.sofifa.net/players/251/570/23_60.png | France | https://cdn.sofifa.net/flags/fr.png | 73 | 88 | Olympique Lyonnais | https://cdn.sofifa.net/teams/66/30.png | ... | No | <span class="pos pos28">SUB | Jul 7, 2019 | NaN | 2023 | 176cm | 71kg | €17.7M | 18.0 | NaN |
| 5361 | 225712 | D. Bahamboula | 27 | https://cdn.sofifa.net/players/225/712/23_60.png | Congo | https://cdn.sofifa.net/flags/cg.png | 63 | 63 | Livingston FC | https://cdn.sofifa.net/teams/621/30.png | ... | No | <span class="pos pos28">SUB | Jul 9, 2022 | NaN | 2024 | 185cm | 70kg | €875K | 7.0 | NaN |
| 10452 | 212455 | 17 H. Mastour | 18 | https://cdn.sofifa.net/players/212/455/17_60.png | Morocco | https://cdn.sofifa.net/flags/ma.png | 65 | 76 | PEC Zwolle | https://cdn.sofifa.net/teams/1914/30.png | ... | No | <span class="pos pos28">SUB | NaN | <a href="/team/47/ac-milan/">AC Milan</a> | Jun 30, 2017 | 175cm | 63kg | NaN | 98.0 | NaN |
65 rows × 29 columns
5. Pandas : 할당
아래와 같은 자료를 조건에 맞게 가공해서 새로운 열을 추가해보자.
np.random.seed(43052)
att = np.random.choice(np.arange(10,21)*5,20)
rep = np.random.choice(np.arange(5,21)*5,20)
mid = np.random.choice(np.arange(0,21)*5,20)
fin = np.random.choice(np.arange(0,21)*5,20)
df = pd.DataFrame({'att':att,'rep':rep,'mid':mid,'fin':fin})
df| att | rep | mid | fin | |
|---|---|---|---|---|
| 0 | 65 | 55 | 50 | 40 |
| 1 | 95 | 100 | 50 | 80 |
| 2 | 65 | 90 | 60 | 30 |
| 3 | 55 | 80 | 75 | 80 |
| 4 | 80 | 30 | 30 | 100 |
| 5 | 75 | 40 | 100 | 15 |
| 6 | 65 | 45 | 45 | 90 |
| 7 | 60 | 60 | 25 | 0 |
| 8 | 95 | 65 | 20 | 10 |
| 9 | 90 | 80 | 80 | 20 |
| 10 | 55 | 75 | 35 | 25 |
| 11 | 95 | 95 | 45 | 0 |
| 12 | 95 | 55 | 15 | 35 |
| 13 | 50 | 80 | 40 | 30 |
| 14 | 50 | 55 | 15 | 85 |
| 15 | 95 | 30 | 30 | 95 |
| 16 | 50 | 50 | 45 | 10 |
| 17 | 65 | 55 | 15 | 45 |
| 18 | 70 | 70 | 40 | 35 |
| 19 | 90 | 90 | 80 | 90 |
### A. df.assign()
- 예시: total = att*0.1 + rep*0.2 + mid*0.35 + fin*0.35 를 계산하여 할당
df.assign(total = df.att*0.1 + df.rep*0.2 + df.mid*0.35 + df.fin*0.35)
## 원래 데이터 손상시키지 않음
df_total = df.assign(total = df.att*0.1 + df.rep*0.2 + df.mid*0.35 + df.fin*0.35)
df_total| att | rep | mid | fin | total | |
|---|---|---|---|---|---|
| 0 | 65 | 55 | 50 | 40 | 49.00 |
| 1 | 95 | 100 | 50 | 80 | 75.00 |
| 2 | 65 | 90 | 60 | 30 | 56.00 |
| 3 | 55 | 80 | 75 | 80 | 75.75 |
| 4 | 80 | 30 | 30 | 100 | 59.50 |
| 5 | 75 | 40 | 100 | 15 | 55.75 |
| 6 | 65 | 45 | 45 | 90 | 62.75 |
| 7 | 60 | 60 | 25 | 0 | 26.75 |
| 8 | 95 | 65 | 20 | 10 | 33.00 |
| 9 | 90 | 80 | 80 | 20 | 60.00 |
| 10 | 55 | 75 | 35 | 25 | 41.50 |
| 11 | 95 | 95 | 45 | 0 | 44.25 |
| 12 | 95 | 55 | 15 | 35 | 38.00 |
| 13 | 50 | 80 | 40 | 30 | 45.50 |
| 14 | 50 | 55 | 15 | 85 | 51.00 |
| 15 | 95 | 30 | 30 | 95 | 59.25 |
| 16 | 50 | 50 | 45 | 10 | 34.25 |
| 17 | 65 | 55 | 15 | 45 | 38.50 |
| 18 | 70 | 70 | 40 | 35 | 47.25 |
| 19 | 90 | 90 | 80 | 90 | 86.50 |
### B. df.eval()
- A에서와 동일한 할당
df.eval('total = att*0.1 + rep*0.2 + mid*0.3 + fin*0.4') ## query를 쓰는 것처럼, 원본 데이터를 변화시키지 않음| att | rep | mid | fin | total | |
|---|---|---|---|---|---|
| 0 | 65 | 55 | 50 | 40 | 48.5 |
| 1 | 95 | 100 | 50 | 80 | 76.5 |
| 2 | 65 | 90 | 60 | 30 | 54.5 |
| 3 | 55 | 80 | 75 | 80 | 76.0 |
| 4 | 80 | 30 | 30 | 100 | 63.0 |
| 5 | 75 | 40 | 100 | 15 | 51.5 |
| 6 | 65 | 45 | 45 | 90 | 65.0 |
| 7 | 60 | 60 | 25 | 0 | 25.5 |
| 8 | 95 | 65 | 20 | 10 | 32.5 |
| 9 | 90 | 80 | 80 | 20 | 57.0 |
| 10 | 55 | 75 | 35 | 25 | 41.0 |
| 11 | 95 | 95 | 45 | 0 | 42.0 |
| 12 | 95 | 55 | 15 | 35 | 39.0 |
| 13 | 50 | 80 | 40 | 30 | 45.0 |
| 14 | 50 | 55 | 15 | 85 | 54.5 |
| 15 | 95 | 30 | 30 | 95 | 62.5 |
| 16 | 50 | 50 | 45 | 10 | 32.5 |
| 17 | 65 | 55 | 15 | 45 | 40.0 |
| 18 | 70 | 70 | 40 | 35 | 47.0 |
| 19 | 90 | 90 | 80 | 90 | 87.0 |
but, 사칙연산과 같은 기초연산 수준에서만 잘 작동한다.
### C. df[colname] = xxx
별로 안쓰는 방법
df['total'] = df.att*0.1 + df.rep*0.2 + df.mid*0.3 + df.fin*0.4 ## 원래의 데이터프레임을 손상시킨다.
df| att | rep | mid | fin | total | |
|---|---|---|---|---|---|
| 0 | 65 | 55 | 50 | 40 | 48.5 |
| 1 | 95 | 100 | 50 | 80 | 76.5 |
| 2 | 65 | 90 | 60 | 30 | 54.5 |
| 3 | 55 | 80 | 75 | 80 | 76.0 |
| 4 | 80 | 30 | 30 | 100 | 63.0 |
| 5 | 75 | 40 | 100 | 15 | 51.5 |
| 6 | 65 | 45 | 45 | 90 | 65.0 |
| 7 | 60 | 60 | 25 | 0 | 25.5 |
| 8 | 95 | 65 | 20 | 10 | 32.5 |
| 9 | 90 | 80 | 80 | 20 | 57.0 |
| 10 | 55 | 75 | 35 | 25 | 41.0 |
| 11 | 95 | 95 | 45 | 0 | 42.0 |
| 12 | 95 | 55 | 15 | 35 | 39.0 |
| 13 | 50 | 80 | 40 | 30 | 45.0 |
| 14 | 50 | 55 | 15 | 85 | 54.5 |
| 15 | 95 | 30 | 30 | 95 | 62.5 |
| 16 | 50 | 50 | 45 | 10 | 32.5 |
| 17 | 65 | 55 | 15 | 45 | 40.0 |
| 18 | 70 | 70 | 40 | 35 | 47.0 |
| 19 | 90 | 90 | 80 | 90 | 87.0 |
6. Pandas : transform column(\(\star\star\star\))
### A. lambda
- 예시1: \(x \to x+2\)
"""
def f(x) :
return x + 2
해당 코드와 동일하다.
"""
f = lambda x: x+2
print(f(3))
print((lambda x : x+2)(3)) ## (lambda x : x+2) 자체가 함수이므로, 뒤에 변수만 지정해주면 된다.5
5
- 예시2: \(x,y \to x+y\)
(lambda x,y : x+y)(1,3)4
- 예시3: ‘2023-09’ \(\to\) 9 (format : int)
(lambda x : int(x[-2:]))('2023-09') ## -1번째(뒤에서 두번째 원소까지 추출)9
- 예시4: ‘2023-09’ \(\to\) (2023, 9) (format : tuple)
(lambda x : (int(x[:4]), int(x[-2:])))('2023-09')(2023, 9)
- 예시5: 문자열이 ‘cat’이면 1 ’dog’ 이면 0 // ’cat이면 1 ’cat’이 아니면 0
(lambda x : 1 if x == 'cat' else 0)('cat')
## (lambda x : pd.Series(x == 'cat').sum())('cat') ## 이것도 된다.1
### B. map
함수의 output들을 엮는다. 매핑하는 거
:- 개념: map(f,[x1,x2,...xn])=[f(x1),f(x2),...,f(xn)]
- 예시1: x->x+1을 [1,2,3]에 적용
f = lambda x: x+1
list(map(f,[1,2,3]))[2, 3, 4]
list(map(lambda x : x + 1, [1,2,3]))[2, 3, 4]
매핑하는 것 자체는 수나 리스트가 아니기 때문에 리스트로 엮어줘야 값을 알 수 있다.
- 예시2 df.Height열 변환하기 (xxxcm 라고 적혀있는 것을 cm 없애고 키만 뽑기)
s.str.replace('cm', '')
df = pd.read_csv('https://raw.githubusercontent.com/guebin/DV2022/master/posts/FIFA23_official_data.csv')
s = df.Height[:5]
s0 189cm
1 179cm
2 172cm
3 181cm
4 172cm
Name: Height, dtype: object
list(map(lambda x : int(x[:-2]), s))[189, 179, 172, 181, 172]
- 풀이 1 : map 이용
df.assign(Height = list(map(lambda x: int(x.replace('cm','')), df.Height)))| ID | Name | Age | Photo | Nationality | Flag | Overall | Potential | Club | Club Logo | ... | Real Face | Position | Joined | Loaned From | Contract Valid Until | Height | Weight | Release Clause | Kit Number | Best Overall Rating | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 0 | 209658 | L. Goretzka | 27 | https://cdn.sofifa.net/players/209/658/23_60.png | Germany | https://cdn.sofifa.net/flags/de.png | 87 | 88 | FC Bayern München | https://cdn.sofifa.net/teams/21/30.png | ... | Yes | <span class="pos pos28">SUB | Jul 1, 2018 | NaN | 2026 | 189 | 82kg | €157M | 8.0 | NaN |
| 1 | 212198 | Bruno Fernandes | 27 | https://cdn.sofifa.net/players/212/198/23_60.png | Portugal | https://cdn.sofifa.net/flags/pt.png | 86 | 87 | Manchester United | https://cdn.sofifa.net/teams/11/30.png | ... | Yes | <span class="pos pos15">LCM | Jan 30, 2020 | NaN | 2026 | 179 | 69kg | €155M | 8.0 | NaN |
| 2 | 224334 | M. Acuña | 30 | https://cdn.sofifa.net/players/224/334/23_60.png | Argentina | https://cdn.sofifa.net/flags/ar.png | 85 | 85 | Sevilla FC | https://cdn.sofifa.net/teams/481/30.png | ... | No | <span class="pos pos7">LB | Sep 14, 2020 | NaN | 2024 | 172 | 69kg | €97.7M | 19.0 | NaN |
| 3 | 192985 | K. De Bruyne | 31 | https://cdn.sofifa.net/players/192/985/23_60.png | Belgium | https://cdn.sofifa.net/flags/be.png | 91 | 91 | Manchester City | https://cdn.sofifa.net/teams/10/30.png | ... | Yes | <span class="pos pos13">RCM | Aug 30, 2015 | NaN | 2025 | 181 | 70kg | €198.9M | 17.0 | NaN |
| 4 | 224232 | N. Barella | 25 | https://cdn.sofifa.net/players/224/232/23_60.png | Italy | https://cdn.sofifa.net/flags/it.png | 86 | 89 | Inter | https://cdn.sofifa.net/teams/44/30.png | ... | Yes | <span class="pos pos13">RCM | Sep 1, 2020 | NaN | 2026 | 172 | 68kg | €154.4M | 23.0 | NaN |
| ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... |
| 17655 | 269526 | Deng Xiongtao | 19 | https://cdn.sofifa.net/players/269/526/23_60.png | China PR | https://cdn.sofifa.net/flags/cn.png | 48 | 61 | Meizhou Hakka | https://cdn.sofifa.net/teams/114628/30.png | ... | No | <span class="pos pos29">RES | Apr 11, 2022 | NaN | 2027 | 190 | 78kg | €218K | 35.0 | NaN |
| 17656 | 267946 | 22 Lim Jun Sub | 17 | https://cdn.sofifa.net/players/267/946/22_60.png | Korea Republic | https://cdn.sofifa.net/flags/kr.png | 48 | 64 | Jeju United FC | https://cdn.sofifa.net/teams/1478/30.png | ... | No | <span class="pos pos29">RES | Jan 1, 2022 | NaN | 2026 | 195 | 84kg | €188K | 21.0 | NaN |
| 17657 | 270567 | A. Demir | 25 | https://cdn.sofifa.net/players/270/567/23_60.png | Turkey | https://cdn.sofifa.net/flags/tr.png | 51 | 56 | Ümraniyespor | https://cdn.sofifa.net/teams/113796/30.png | ... | No | <span class="pos pos29">RES | Jun 6, 2021 | NaN | 2023 | 190 | 82kg | €142K | 12.0 | NaN |
| 17658 | 256624 | 21 S. Czajor | 18 | https://cdn.sofifa.net/players/256/624/21_60.png | Poland | https://cdn.sofifa.net/flags/pl.png | 50 | 65 | Fleetwood Town | https://cdn.sofifa.net/teams/112260/30.png | ... | No | <span class="pos pos29">RES | Jan 1, 2020 | NaN | 2021 | 187 | 79kg | €214K | 40.0 | NaN |
| 17659 | 256376 | 21 F. Jakobsson | 20 | https://cdn.sofifa.net/players/256/376/21_60.png | Sweden | https://cdn.sofifa.net/flags/se.png | 50 | 61 | IFK Norrköping | https://cdn.sofifa.net/teams/702/30.png | ... | No | <span class="pos pos29">RES | Jan 8, 2020 | NaN | 2021 | 186 | 78kg | €131K | 30.0 | NaN |
17660 rows × 29 columns
- 풀이 2 : 사실 수틀리면 컴프리헨션 쓰면 된다.
df.assign(Height = [int(s.replace('cm', '')) for s in df.Height])| ID | Name | Age | Photo | Nationality | Flag | Overall | Potential | Club | Club Logo | ... | Real Face | Position | Joined | Loaned From | Contract Valid Until | Height | Weight | Release Clause | Kit Number | Best Overall Rating | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 0 | 209658 | L. Goretzka | 27 | https://cdn.sofifa.net/players/209/658/23_60.png | Germany | https://cdn.sofifa.net/flags/de.png | 87 | 88 | FC Bayern München | https://cdn.sofifa.net/teams/21/30.png | ... | Yes | <span class="pos pos28">SUB | Jul 1, 2018 | NaN | 2026 | 189 | 82kg | €157M | 8.0 | NaN |
| 1 | 212198 | Bruno Fernandes | 27 | https://cdn.sofifa.net/players/212/198/23_60.png | Portugal | https://cdn.sofifa.net/flags/pt.png | 86 | 87 | Manchester United | https://cdn.sofifa.net/teams/11/30.png | ... | Yes | <span class="pos pos15">LCM | Jan 30, 2020 | NaN | 2026 | 179 | 69kg | €155M | 8.0 | NaN |
| 2 | 224334 | M. Acuña | 30 | https://cdn.sofifa.net/players/224/334/23_60.png | Argentina | https://cdn.sofifa.net/flags/ar.png | 85 | 85 | Sevilla FC | https://cdn.sofifa.net/teams/481/30.png | ... | No | <span class="pos pos7">LB | Sep 14, 2020 | NaN | 2024 | 172 | 69kg | €97.7M | 19.0 | NaN |
| 3 | 192985 | K. De Bruyne | 31 | https://cdn.sofifa.net/players/192/985/23_60.png | Belgium | https://cdn.sofifa.net/flags/be.png | 91 | 91 | Manchester City | https://cdn.sofifa.net/teams/10/30.png | ... | Yes | <span class="pos pos13">RCM | Aug 30, 2015 | NaN | 2025 | 181 | 70kg | €198.9M | 17.0 | NaN |
| 4 | 224232 | N. Barella | 25 | https://cdn.sofifa.net/players/224/232/23_60.png | Italy | https://cdn.sofifa.net/flags/it.png | 86 | 89 | Inter | https://cdn.sofifa.net/teams/44/30.png | ... | Yes | <span class="pos pos13">RCM | Sep 1, 2020 | NaN | 2026 | 172 | 68kg | €154.4M | 23.0 | NaN |
| ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... |
| 17655 | 269526 | Deng Xiongtao | 19 | https://cdn.sofifa.net/players/269/526/23_60.png | China PR | https://cdn.sofifa.net/flags/cn.png | 48 | 61 | Meizhou Hakka | https://cdn.sofifa.net/teams/114628/30.png | ... | No | <span class="pos pos29">RES | Apr 11, 2022 | NaN | 2027 | 190 | 78kg | €218K | 35.0 | NaN |
| 17656 | 267946 | 22 Lim Jun Sub | 17 | https://cdn.sofifa.net/players/267/946/22_60.png | Korea Republic | https://cdn.sofifa.net/flags/kr.png | 48 | 64 | Jeju United FC | https://cdn.sofifa.net/teams/1478/30.png | ... | No | <span class="pos pos29">RES | Jan 1, 2022 | NaN | 2026 | 195 | 84kg | €188K | 21.0 | NaN |
| 17657 | 270567 | A. Demir | 25 | https://cdn.sofifa.net/players/270/567/23_60.png | Turkey | https://cdn.sofifa.net/flags/tr.png | 51 | 56 | Ümraniyespor | https://cdn.sofifa.net/teams/113796/30.png | ... | No | <span class="pos pos29">RES | Jun 6, 2021 | NaN | 2023 | 190 | 82kg | €142K | 12.0 | NaN |
| 17658 | 256624 | 21 S. Czajor | 18 | https://cdn.sofifa.net/players/256/624/21_60.png | Poland | https://cdn.sofifa.net/flags/pl.png | 50 | 65 | Fleetwood Town | https://cdn.sofifa.net/teams/112260/30.png | ... | No | <span class="pos pos29">RES | Jan 1, 2020 | NaN | 2021 | 187 | 79kg | €214K | 40.0 | NaN |
| 17659 | 256376 | 21 F. Jakobsson | 20 | https://cdn.sofifa.net/players/256/376/21_60.png | Sweden | https://cdn.sofifa.net/flags/se.png | 50 | 61 | IFK Norrköping | https://cdn.sofifa.net/teams/702/30.png | ... | No | <span class="pos pos29">RES | Jan 8, 2020 | NaN | 2021 | 186 | 78kg | €131K | 30.0 | NaN |
17660 rows × 29 columns
# 예시4 – df.Position 열에 아래와 같은 변환을 수행하고, 변환된 열을 할당하라.
| before | after |
|---|---|
<span class="pos pos28">SUB |
SUB |
<span class="pos pos15">LCM |
LCM |
<span class="pos pos7">LB |
LB |
<span class="pos pos13">RCM |
RCM |
<span class="pos pos13">RCM |
RCM |
- 풀이 1
- 데이터를 불러와서…
list(map(lambda x : x.str.split('>')[-1] if x.isna() == False else pd.NA, df.Position))df = pd.read_csv('https://raw.githubusercontent.com/guebin/DV2022/master/posts/FIFA23_official_data.csv')- 저장된 꼬라지를 보면…
x = df.Position[0]
x'<span class="pos pos28">SUB'
- 게다가 결측치까지 있네???
df.Position.isna().sum()35
- 결측치 처리 + 데이터 변환
df.assign(Position = list(map(lambda x : x.split('>')[-1] if not pd.isna(x) else pd.NA, df.Position))).Position0 SUB
1 LCM
2 LB
3 RCM
4 RCM
...
17655 RES
17656 RES
17657 RES
17658 RES
17659 RES
Name: Position, Length: 17660, dtype: object
- (풀이2) – 수틀리면 리스트컴프리헨션
f = lambda x: x.split(">")[-1] if not pd.isna(x) else pd.NAdf.assign(Position = [f(s) for s in df.Position]).Position0 SUB
1 LCM
2 LB
3 RCM
4 RCM
...
17655 RES
17656 RES
17657 RES
17658 RES
17659 RES
Name: Position, Length: 17660, dtype: object
mapping하는 게 조금 더 자연스럽고 한번에 쓸 수 있다. ~(어차피 이미 람다로 함수 만들었잖아?)~